{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 载入数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import numpy as np\n",
    "from tqdm import tqdm\n",
    "\n",
    "n = 25000\n",
    "width = 299\n",
    "\n",
    "X = np.zeros((n, width, width, 3), dtype=np.uint8)\n",
    "y = np.zeros((n,), dtype=np.uint8)\n",
    "\n",
    "for i in tqdm(range(n/2)):\n",
    "    X[i] = cv2.resize(cv2.imread('train/cat.%d.jpg' % i), (width, width))\n",
    "    X[i+n/2] = cv2.resize(cv2.imread('train/dog.%d.jpg' % i), (width, width))\n",
    "\n",
    "y[n/2:] = 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 提取特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.layers import *\n",
    "from keras.models import *\n",
    "from keras.applications import *\n",
    "from keras.optimizers import *\n",
    "from keras.applications.inception_v3 import preprocess_input\n",
    "\n",
    "from keras.backend.tensorflow_backend import set_session\n",
    "import tensorflow as tf\n",
    "config = tf.ConfigProto()\n",
    "config.gpu_options.allow_growth=True\n",
    "set_session(tf.Session(config=config))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_features(MODEL, data=X):\n",
    "    cnn_model = MODEL(include_top=False, input_shape=(width, width, 3), weights='imagenet')\n",
    "    \n",
    "    inputs = Input((width, width, 3))\n",
    "    x = inputs\n",
    "    x = Lambda(preprocess_input, name='preprocessing')(x)\n",
    "    x = cnn_model(x)\n",
    "    x = GlobalAveragePooling2D()(x)\n",
    "    cnn_model = Model(inputs, x)\n",
    "\n",
    "    #features = cnn_model.predict(data, batch_size=64, verbose=1)\n",
    "    features = cnn_model.predict(data, batch_size=32, verbose=1)\n",
    "    return features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "inception_features = get_features(InceptionV3, X)\n",
    "xception_features = get_features(Xception, X)\n",
    "features = np.concatenate([inception_features, xception_features], axis=-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 融合模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "inputs = Input(features.shape[1:])\n",
    "x = inputs\n",
    "x = Dropout(0.5)(x)\n",
    "x = Dense(1, activation='sigmoid')(x)\n",
    "model = Model(inputs, x)\n",
    "model.compile(optimizer='adam',\n",
    "              loss='binary_crossentropy',\n",
    "              metrics=['accuracy'])\n",
    "h = model.fit(features, y, batch_size=128, epochs=10, validation_split=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import SVG\n",
    "from keras.utils.vis_utils import model_to_dot\n",
    "\n",
    "SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format = 'retina'\n",
    "\n",
    "plt.figure(figsize=(10, 4))\n",
    "plt.subplot(1, 2, 1)\n",
    "plt.plot(h.history['loss'])\n",
    "plt.plot(h.history['val_loss'])\n",
    "plt.legend(['loss', 'val_loss'])\n",
    "plt.ylabel('loss')\n",
    "plt.xlabel('epoch')\n",
    "\n",
    "plt.subplot(1, 2, 2)\n",
    "plt.plot(h.history['acc'])\n",
    "plt.plot(h.history['val_acc'])\n",
    "plt.legend(['acc', 'val_acc'])\n",
    "plt.ylabel('acc')\n",
    "plt.xlabel('epoch')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 在测试集上预测并提交到 kaggle 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 12500\n",
    "X_test = np.zeros((n, width, width, 3), dtype=np.uint8)\n",
    "\n",
    "for i in tqdm(range(n)):\n",
    "    X_test[i] = cv2.resize(cv2.imread('test/%d.jpg' % (i+1)), (width, width))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "inception_features = get_features(InceptionV3, X_test)\n",
    "xception_features = get_features(Xception, X_test)\n",
    "features_test = np.concatenate([inception_features, xception_features], axis=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_pred = model.predict(features_test, batch_size=128)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('sample_submission.csv')\n",
    "df['label'] = y_pred.clip(min=0.005, max=0.995)\n",
    "df.to_csv('pred.csv', index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
